set.seed(2005) # for reproducibility
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggthemes)
library(corrplot)
## corrplot 0.84 loaded
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(leaflet)
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
## 
##     theme_map
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
## 
##     outlier
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(rpart.plot)
## Loading required package: rpart
library(rpart)

data <- read.csv("/Users/anisha/Downloads/countries.csv", encoding="UTF-8", stringsAsFactors = F)
glimpse(data)
## Rows: 188
## Columns: 21
## $ Country                        <chr> "Afghanistan", "Albania", "Algeria", "…
## $ Region                         <chr> "Middle East/Central Asia", "Northern/…
## $ Population..millions.          <dbl> 29.82, 3.16, 38.48, 20.82, 0.09, 41.09…
## $ HDI                            <dbl> 0.46, 0.73, 0.73, 0.52, 0.78, 0.83, 0.…
## $ GDP.per.Capita                 <chr> "$614.66", "$4,534.37", "$5,430.57", "…
## $ Cropland.Footprint             <dbl> 0.30, 0.78, 0.60, 0.33, NA, 0.78, 0.74…
## $ Grazing.Footprint              <dbl> 0.20, 0.22, 0.16, 0.15, NA, 0.79, 0.18…
## $ Forest.Footprint               <dbl> 0.08, 0.25, 0.17, 0.12, NA, 0.29, 0.34…
## $ Carbon.Footprint               <dbl> 0.18, 0.87, 1.14, 0.20, NA, 1.08, 0.89…
## $ Fish.Footprint                 <dbl> 0.00, 0.02, 0.01, 0.09, NA, 0.10, 0.01…
## $ Total.Ecological.Footprint     <dbl> 0.79, 2.21, 2.12, 0.93, 5.38, 3.14, 2.…
## $ Cropland                       <dbl> 0.24, 0.55, 0.24, 0.20, NA, 2.64, 0.44…
## $ Grazing.Land                   <dbl> 0.20, 0.21, 0.27, 1.42, NA, 1.86, 0.26…
## $ Forest.Land                    <dbl> 0.02, 0.29, 0.03, 0.64, NA, 0.66, 0.10…
## $ Fishing.Water                  <dbl> 0.00, 0.07, 0.01, 0.26, NA, 1.67, 0.02…
## $ Urban.Land                     <dbl> 0.04, 0.06, 0.03, 0.04, NA, 0.10, 0.07…
## $ Total.Biocapacity              <dbl> 0.50, 1.18, 0.59, 2.55, 0.94, 6.92, 0.…
## $ Biocapacity.Deficit.or.Reserve <dbl> -0.30, -1.03, -1.53, 1.61, -4.44, 3.78…
## $ Earths.Required                <dbl> 0.46, 1.27, 1.22, 0.54, 3.11, 1.82, 1.…
## $ Countries.Required             <dbl> 1.60, 1.87, 3.61, 0.37, 5.70, 0.45, 2.…
## $ Data.Quality                   <chr> "6", "6", "5", "6", "2", "6", "3B", "2…
summary(data)
##    Country             Region          Population..millions.      HDI        
##  Length:188         Length:188         Min.   :   0.000      Min.   :0.3400  
##  Class :character   Class :character   1st Qu.:   2.038      1st Qu.:0.5575  
##  Mode  :character   Mode  :character   Median :   7.970      Median :0.7200  
##                                        Mean   :  37.342      Mean   :0.6864  
##                                        3rd Qu.:  24.870      3rd Qu.:0.8025  
##                                        Max.   :1408.040      Max.   :0.9400  
##                                                              NA's   :16      
##  GDP.per.Capita     Cropland.Footprint Grazing.Footprint Forest.Footprint
##  Length:188         Min.   :0.0700     Min.   :0.0000    Min.   :0.0100  
##  Class :character   1st Qu.:0.3500     1st Qu.:0.0800    1st Qu.:0.1700  
##  Mode  :character   Median :0.5200     Median :0.1800    Median :0.2600  
##                     Mean   :0.5782     Mean   :0.2632    Mean   :0.3738  
##                     3rd Qu.:0.7000     3rd Qu.:0.3200    3rd Qu.:0.4600  
##                     Max.   :2.6800     Max.   :3.4700    Max.   :3.0300  
##                     NA's   :15         NA's   :15        NA's   :15      
##  Carbon.Footprint Fish.Footprint   Total.Ecological.Footprint    Cropland     
##  Min.   : 0.000   Min.   :0.0000   Min.   : 0.420             Min.   :0.0000  
##  1st Qu.: 0.420   1st Qu.:0.0200   1st Qu.: 1.482             1st Qu.:0.1800  
##  Median : 1.140   Median :0.0700   Median : 2.740             Median :0.3500  
##  Mean   : 1.805   Mean   :0.1225   Mean   : 3.318             Mean   :0.5319  
##  3rd Qu.: 2.600   3rd Qu.:0.1500   3rd Qu.: 4.640             3rd Qu.:0.5900  
##  Max.   :12.650   Max.   :0.8200   Max.   :15.820             Max.   :5.4200  
##  NA's   :15       NA's   :15                                  NA's   :15      
##   Grazing.Land     Forest.Land     Fishing.Water       Urban.Land     
##  Min.   :0.0000   Min.   : 0.000   Min.   : 0.0000   Min.   :0.00000  
##  1st Qu.:0.0300   1st Qu.: 0.060   1st Qu.: 0.0300   1st Qu.:0.03000  
##  Median :0.1200   Median : 0.340   Median : 0.1100   Median :0.05000  
##  Mean   :0.4566   Mean   : 2.459   Mean   : 0.5951   Mean   :0.06711  
##  3rd Qu.:0.3400   3rd Qu.: 1.170   3rd Qu.: 0.3700   3rd Qu.:0.09000  
##  Max.   :8.2300   Max.   :95.160   Max.   :16.0700   Max.   :0.27000  
##  NA's   :15       NA's   :15       NA's   :15        NA's   :15       
##  Total.Biocapacity Biocapacity.Deficit.or.Reserve Earths.Required
##  Min.   :  0.050   Min.   :-14.1400               Min.   :0.240  
##  1st Qu.:  0.675   1st Qu.: -1.9350               1st Qu.:0.855  
##  Median :  1.310   Median : -0.7300               Median :1.580  
##  Mean   :  4.020   Mean   :  0.7021               Mean   :1.916  
##  3rd Qu.:  2.815   3rd Qu.:  0.2125               3rd Qu.:2.678  
##  Max.   :111.350   Max.   :109.0100               Max.   :9.140  
##                                                                  
##  Countries.Required Data.Quality      
##  Min.   :  0.0200   Length:188        
##  1st Qu.:  0.9425   Class :character  
##  Median :  1.7050   Mode  :character  
##  Mean   :  4.0374                     
##  3rd Qu.:  2.8475                     
##  Max.   :159.4700                     
## 
data$GDP.per.Capita <-  as.numeric(gsub('[$,]', '', data$GDP.per.Capita))

#Conversion to Factors
data$Country <- as.factor(data$Country)
data$Region <- as.factor(data$Region)
data$Data.Quality <- NULL

#Analysing Countries with Missing Data
table1_data <- data[is.na(data$HDI), c(1,2)]
rownames(table1_data) <- NULL
table1_data %>% kable(caption = "Countries with Missing Data") %>%  kable_styling("striped")
Countries with Missing Data
Country Region
Aruba Latin America
Bermuda North America
British Virgin Islands Latin America
Cayman Islands Latin America
Côte d’Ivoire Africa
French Guiana Latin America
French Polynesia Asia-Pacific
Guadeloupe Latin America
Korea, Democratic People’s Republic of Asia-Pacific
Martinique Latin America
Montserrat Latin America
Nauru Asia-Pacific
New Caledonia Asia-Pacific
Réunion Africa
Somalia Africa
Wallis and Futuna Islands Asia-Pacific
#Visualizing Numeric Variables for Skewness
multi.hist(data[,sapply(data, is.numeric)])

data %>% ggplot(aes(x = Total.Ecological.Footprint)) +
  geom_histogram(bins = 30, aes(y = ..density..), fill = "indianred1") + 
  geom_density(alpha = 0.2, fill = "indianred1") +
  theme_fivethirtyeight() +
  ggtitle("Distribution of Total Ecological Footprint") +
  theme(axis.title = element_text(), axis.title.x = element_text()) +
  geom_vline(xintercept = mean(data$Total.Ecological.Footprint), size = 2, linetype = 3) +
  annotate("text", x = 7, y = 0.35, label = "Average Footprint per Capita")

data %>% ggplot(aes(x = Total.Biocapacity)) +
  geom_histogram(bins = 30, aes(y = ..density..), fill = "lightgreen") + 
  geom_density(alpha = 0.2, fill = " lightgreen") +
  theme_fivethirtyeight() +
  ggtitle("Distribution of Total Biocapacity") +
  theme(axis.title = element_text(), axis.title.x = element_text()) +
  geom_vline(xintercept = mean(data$Total.Biocapacity), size = 2, linetype = 3) +
  annotate("text", x = 7, y = 0.35, label = "Avg Biocapacity per Person")

data %>% ggplot(aes(x = Biocapacity.Deficit.or.Reserve)) +
  geom_histogram(bins = 30, aes(y = ..density..), fill = "lightyellow") + 
  geom_density(alpha = 0.2, fill = "lightyellow") +
  theme_fivethirtyeight() +
  ggtitle("Distribution of Ecoloigical Deficit/Reserve") +
  theme(axis.title = element_text(), axis.title.x = element_text()) +
  geom_vline(xintercept = mean(data$Biocapacity.Deficit.or.Reserve), size = 2, linetype = 3) +
  annotate("text", x = 7, y = 0.35, label = "Avg Deficit/Reserve per Person")

data %>%
  group_by(Country) %>%
  summarise(EcoFootprintMedian = median(`Total.Ecological.Footprint`, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(Country = reorder(Country,EcoFootprintMedian)) %>%
  arrange(desc(EcoFootprintMedian)) %>%
  head(20) %>%
  
  
  ggplot(aes(x = Country,y = EcoFootprintMedian)) +
  geom_bar(stat='identity', fill = "indianred1") +
  geom_text(aes(x = Country, y = 1, label = paste0("(",EcoFootprintMedian,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = "black",
            fontface = 'italic') +
  labs(x = 'Countries', 
       y = 'Ecological Footprint', 
       title = 'Countries With Highest Footprint') +
  coord_flip() +  theme(legend.position = "") 

data %>%
  group_by(Country) %>%
  summarise(EcoFootprintMedian = median(`Total.Ecological.Footprint`, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(Country = reorder(Country,EcoFootprintMedian)) %>%
  arrange(desc(EcoFootprintMedian)) %>%
  tail(20) %>%
  
  
  ggplot(aes(x = Country,y = EcoFootprintMedian)) +
  geom_bar(stat='identity', fill = "indianred1") +
  geom_text(aes(x = Country, y = 1, label = paste0("(",EcoFootprintMedian,")",sep="")),
            hjust=0, vjust=.5, size = 2, colour = "black",
            fontface = 'italic') +
  labs(x = 'Countries', 
       y = 'Ecological Footprint', 
       title = 'Countries With Lowest Footprint') +
  coord_flip() +  theme(legend.position = "") 

data %>%
  group_by(Country) %>%
  summarise(EcoBiocapcityMedian = median(`Total.Biocapacity`, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(Country = reorder(Country,EcoBiocapcityMedian)) %>%
  arrange(desc(EcoBiocapcityMedian)) %>%
  head(20) %>%
  
  
  ggplot(aes(x = Country,y = EcoBiocapcityMedian)) +
  geom_bar(stat='identity', fill = "lightgreen") +
  geom_text(aes(x = Country, y = 1, label = paste0("(",EcoBiocapcityMedian,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = "black",
            fontface = 'italic') +
  labs(x = 'Countries', 
       y = 'Biocapacity', 
       title = 'Countries With Highest Biocapacities') +
  coord_flip() +  theme(legend.position = "") 

data %>%
  group_by(Country) %>%
  summarise(EcoDeficitReserveMedian = median(`Biocapacity.Deficit.or.Reserve`, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(Country = reorder(Country,EcoDeficitReserveMedian)) %>%
  arrange(desc(EcoDeficitReserveMedian)) %>%
  head(10) %>%
  
  
  ggplot(aes(x = Country,y = EcoDeficitReserveMedian)) +
  geom_bar(stat='identity', fill = "green") +
  geom_text(aes(x = Country, y = 1, label = paste0("(",EcoDeficitReserveMedian,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = "black",
            fontface = 'italic') +
  labs(x = 'Countries', 
       y = 'Ecological Reserves', 
       title = 'Countries With Largest Ecological Reserves') +
  coord_flip() +  theme(legend.position = "") 

data %>%
  group_by(Country) %>%
  summarise(EcoDeficitReserveMedian = median(`Biocapacity.Deficit.or.Reserve`, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(Country = reorder(Country,EcoDeficitReserveMedian)) %>%
  arrange(desc(EcoDeficitReserveMedian)) %>%
  tail(10) %>%
  
  
  ggplot(aes(x = Country,y = EcoDeficitReserveMedian)) +
  geom_bar(stat='identity', fill = "indianred1") +
  geom_text(aes(x = Country, y = 1, label = paste0("(",EcoDeficitReserveMedian,")",sep="")),
            hjust=0, vjust=.4, size = 1.5, colour = "black",
            fontface = 'italic') +
  labs(x = 'Countries', 
       y = 'Ecological Deficits', 
       title = 'Countries With Largest Ecological Deficits') +
  coord_flip() +  theme(legend.position = "") 

data %>%
  group_by(Country) %>%
  summarise(PopMedian = median(`Population..millions.`, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(Country = reorder(Country,PopMedian)) %>%
  arrange(desc(PopMedian)) %>%
  head(10) %>%
  
  
  ggplot(aes(x = Country,y = PopMedian)) +
  geom_bar(stat='identity', fill = "yellow") +
  geom_text(aes(x = Country, y = 1, label = paste0("(",PopMedian,")",sep="")),
            hjust=0, vjust=.4, size = 1.5, colour = "black",
            fontface = 'italic') +
  labs(x = 'Countries', 
       y = 'Population (in millions)', 
       title = 'Countries with Highest Population') +
  coord_flip() +  theme(legend.position = "") 

data %>%
  group_by(Country) %>%
  summarise(GDPMedian = median(`GDP.per.Capita`, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(Country = reorder(Country,GDPMedian)) %>%
  arrange(desc(GDPMedian)) %>%
  head(10) %>%
  
  
  ggplot(aes(x = Country,y = GDPMedian)) +
  geom_bar(stat='identity', fill = "olivedrab") +
  geom_text(aes(x = Country, y = 1, label = paste0("(",GDPMedian,")",sep="")),
            hjust=0, vjust=.4, size = 1.5, colour = "black",
            fontface = 'italic') +
  labs(x = 'Countries', 
       y = 'GDP per Capita', 
       title = 'Countries with Highest GDP per Capita') +
  coord_flip() +  theme(legend.position = "") 

data %>%
  group_by(Country) %>%
  summarise(HDIMedian = median(`HDI`, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(Country = reorder(Country,HDIMedian)) %>%
  arrange(desc(HDIMedian)) %>%
  head(10) %>%
  
  
  ggplot(aes(x = Country,y = HDIMedian)) +
  geom_bar(stat='identity', fill = "violetred") +
  geom_text(aes(x = Country, y = 1, label = paste0("(",HDIMedian,")",sep="")),
            hjust=0, vjust=.4, size = 1.5, colour = "black",
            fontface = 'italic') +
  labs(x = 'Countries', 
       y = 'HDI', 
       title = 'Countries with Highest HDI') +
  coord_flip() +  theme(legend.position = "")

#Correlation Plots
k <- data[, sapply(data, is.numeric)]
k <- k[complete.cases(k), ]
korelacija <- cor(k)
corrplot(korelacija, method = "color", tl.cex = 0.825, title = "Pearson's Correlation", mar=c(0,0,1,0))

k2 <- data[, sapply(data, is.numeric)]
k2 <- k2[complete.cases(k2), ]
korelacija2 <- cor(k2, method = "spearman")
corrplot(korelacija2, method = "color", tl.cex = 0.825, title = "Spearman's Correlation", mar = c(0,0,1,0))

#Regional Breakdowns:

#Number of Countries in each Region
data %>% group_by(Region) %>% tally() %>% 
  ggplot(aes(x = reorder(Region, n), n)) +
  geom_bar(stat = "identity", fill = "lightsalmon") +
  theme_fivethirtyeight() +
  ggtitle("Number of Countries in each Region") +
  geom_text(aes(x = Region, y = 1, label = paste0(n)),
            hjust=0.15, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') + coord_flip()

#Total Footprint of each Region
data %>% group_by(Region) %>% summarise(Total = sum(Total.Ecological.Footprint)) %>% 
  ggplot(aes(Region, Total, fill = Region)) + geom_histogram(stat = "identity") +
  theme_fivethirtyeight() +
  ggtitle("Total Ecological Footprint by Region") +
  theme(axis.text.x = element_text(angle = 20, vjust = 0.65))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

#Biocapacities in each Region
data %>% group_by(Region) %>% summarise(Total = sum(Total.Biocapacity)) %>% 
  ggplot(aes(Region, Total, fill = Region)) + geom_histogram(stat = "identity") +
  theme_fivethirtyeight() +
  ggtitle("Total Biocapacity by Region") +
  theme(axis.text.x = element_text(angle = 20, vjust = 0.65))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

#Ecological Deficit/Reserve in each Region
data %>% group_by(Region) %>% summarise(Total = sum(Biocapacity.Deficit.or.Reserve)) %>% 
  ggplot(aes(Region, Total, fill = Region)) + geom_histogram(stat = "identity") +
  theme_fivethirtyeight() +
  ggtitle("Total Deficit/Reserve by Region") +
  theme(axis.text.x = element_text(angle = 20, vjust = 0.65))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

#Plotting Footprint over HDI by Region and Population
by_region <- data %>%
  group_by(HDI, Region) %>%
  summarize(FootprintMedian = median(`Total.Ecological.Footprint`),
            PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'HDI'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = HDI, y = FootprintMedian, color = Region, size = PopulationMedian)) +
  geom_point(alpha=0.7) + ylab("Ecological Footprint") + labs(title="Countries' Footprints over HDI by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4"))+ xlab("HDI") +ylab("Footprint")
## Warning: Removed 4 rows containing missing values (geom_point).

#Plotting Footprint over GDP by Region and Population
by_region <- data %>%
  group_by(GDP.per.Capita, Region) %>%
  summarize(FootprintMedian = median(`Total.Ecological.Footprint`),
            PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'GDP.per.Capita'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = GDP.per.Capita, y = FootprintMedian, color = Region, size = PopulationMedian)) +
  geom_point(alpha=0.7) + ylab("Ecological Footprint") + labs(title="Countries' Footprints over GDP per Capita by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4"))+ xlab("GDP") +ylab("Footprint")
## Warning: Removed 4 rows containing missing values (geom_point).

#Plotting Deficit/Reserve over HDI by Region and Population
by_region <- data %>%
  group_by(HDI, Region) %>%
  summarize(DeficitMedian = median(`Biocapacity.Deficit.or.Reserve`),
            PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'HDI'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = HDI, y = DeficitMedian, color = Region, size = PopulationMedian)) +
  geom_point(alpha=0.7) + ylab("Ecological Deficit/Reserve") + labs(title="Countries' Deficit/Reserve over HDI by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4"))+ xlab("HDI") +ylab("Deficit/Reserve")
## Warning: Removed 4 rows containing missing values (geom_point).

#Plotting Deficit/Reserve over GDP by Region and Population
by_region <- data %>%
  group_by(GDP.per.Capita, Region) %>%
  summarize(DeficitMedian = median(`Biocapacity.Deficit.or.Reserve`),
            PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'GDP.per.Capita'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = GDP.per.Capita, y = DeficitMedian, color = Region, size = PopulationMedian)) +
  geom_point(alpha=0.7) + ylab("Ecological Deficit/Reserve") + labs(title="Countries' Deficit/Reserve over GDP per Capita by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4"))+ xlab("GDP") +ylab("Deficit/Reserve")
## Warning: Removed 4 rows containing missing values (geom_point).

#Plotting GDP over HDI by Region and Population
by_region <- data %>%
  group_by(HDI, Region) %>%
  summarize(GDPMedian = median(`GDP.per.Capita`),
            PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'HDI'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = HDI, y = GDPMedian, color = Region, size =  PopulationMedian)) + geom_point(alpha=0.7) + ylab("GDP Per Capita") + labs(title="Countries' Human Developent Index over GDP Per Capita by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4")) + xlab("HDI") + ylab("GDP Per Capita")
## Warning: Removed 5 rows containing missing values (geom_point).

#Linear Regression Model

#Simple Linear Regression using HDI as the single predictor

LinearModelHDI <- lm(Total.Ecological.Footprint ~ HDI, data = data)
summary(LinearModelHDI)
## 
## Call:
## lm(formula = Total.Ecological.Footprint ~ HDI, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6459 -0.9784 -0.3301  0.6633 10.3107 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -4.3021     0.5423  -7.933 2.74e-13 ***
## HDI          11.0241     0.7706  14.306  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.572 on 170 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.5462, Adjusted R-squared:  0.5436 
## F-statistic: 204.7 on 1 and 170 DF,  p-value: < 2.2e-16
#Exponential Linear Regression using HDI as the single predictor

LinearModelHDI2 <- lm(Total.Ecological.Footprint ~ I(exp(HDI)), data = data)
summary(LinearModelHDI2)
## 
## Call:
## lm(formula = Total.Ecological.Footprint ~ I(exp(HDI)), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5625 -0.9288 -0.4152  0.5897 10.0971 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.3649     0.7785  -10.74   <2e-16 ***
## I(exp(HDI))   5.7853     0.3829   15.11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.525 on 170 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.5731, Adjusted R-squared:  0.5706 
## F-statistic: 228.3 on 1 and 170 DF,  p-value: < 2.2e-16
#Linear Regression with Squared Term using HDI as the single predictor

LinearModelHDI3 <- lm(Total.Ecological.Footprint ~ HDI + I(HDI**2), data = data)
summary(LinearModelHDI3)
## 
## Call:
## lm(formula = Total.Ecological.Footprint ~ HDI + I(HDI^2), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5236 -0.8254 -0.2381  0.3253  9.5480 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.502      1.985   3.275 0.001280 ** 
## HDI          -23.827      6.238  -3.820 0.000188 ***
## I(HDI^2)      26.481      4.709   5.623 7.61e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.447 on 169 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.6178, Adjusted R-squared:  0.6132 
## F-statistic: 136.6 on 2 and 169 DF,  p-value: < 2.2e-16
linearRegression <- ggplot(data, aes(HDI, Total.Ecological.Footprint)) +
  geom_point(aes(text = Country)) +
  geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
  geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
  ggtitle("Simple Linear Regression Model (HDI)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
LinearModelGDP <- lm(Total.Ecological.Footprint ~ GDP.per.Capita + I(GDP.per.Capita**2), data = data)
summary(LinearModelGDP)
## 
## Call:
## lm(formula = Total.Ecological.Footprint ~ GDP.per.Capita + I(GDP.per.Capita^2), 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3315 -0.7689 -0.2931  0.6164  6.3890 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.738e+00  1.432e-01  12.139  < 2e-16 ***
## GDP.per.Capita       1.344e-04  1.284e-05  10.475  < 2e-16 ***
## I(GDP.per.Capita^2) -5.874e-10  1.532e-10  -3.835 0.000177 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.351 on 170 degrees of freedom
##   (15 observations deleted due to missingness)
## Multiple R-squared:  0.6678, Adjusted R-squared:  0.6639 
## F-statistic: 170.9 on 2 and 170 DF,  p-value: < 2.2e-16
linearRegression2 <- ggplot(data, aes(GDP.per.Capita, Total.Ecological.Footprint)) +
  geom_point(aes(text = Country)) +
  geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
  geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
  ggtitle("Simple Linear Regression Model (GDP per Capita)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression2, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
#Multiple Linear Regression using HDI and GDP per Capita

MultipleModel<- lm(Total.Ecological.Footprint ~ GDP.per.Capita + HDI + I(GDP.per.Capita**2) + I(HDI**2), data = data)
summary(MultipleModel)
## 
## Call:
## lm(formula = Total.Ecological.Footprint ~ GDP.per.Capita + HDI + 
##     I(GDP.per.Capita^2) + I(HDI^2), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8412 -0.7107 -0.1608  0.5324  5.3628 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         -2.853e+00  2.263e+00  -1.261  0.20920   
## GDP.per.Capita       8.304e-05  2.503e-05   3.318  0.00111 **
## HDI                  1.100e+01  7.758e+00   1.419  0.15791   
## I(GDP.per.Capita^2) -1.535e-10  2.077e-10  -0.739  0.46083   
## I(HDI^2)            -5.035e+00  6.603e+00  -0.762  0.44690   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.259 on 166 degrees of freedom
##   (17 observations deleted due to missingness)
## Multiple R-squared:  0.715,  Adjusted R-squared:  0.7081 
## F-statistic: 104.1 on 4 and 166 DF,  p-value: < 2.2e-16
#Note: Did not use Population because it had very low Pearson/Spearman Coorelations to Footprint

MultipleModel2<- lm(Biocapacity.Deficit.or.Reserve ~ Total.Ecological.Footprint + Total.Biocapacity + I(Total.Ecological.Footprint**2) + I(Total.Biocapacity**2), data = data)
summary(MultipleModel2)
## 
## Call:
## lm(formula = Biocapacity.Deficit.or.Reserve ~ Total.Ecological.Footprint + 
##     Total.Biocapacity + I(Total.Ecological.Footprint^2) + I(Total.Biocapacity^2), 
##     data = data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0101987 -0.0001518 -0.0000773  0.0001638  0.0105052 
## 
## Coefficients:
##                                   Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)                      2.374e-04  9.140e-04     0.260    0.795    
## Total.Ecological.Footprint      -1.000e+00  4.190e-04 -2386.979   <2e-16 ***
## Total.Biocapacity                9.999e-01  1.084e-04  9222.256   <2e-16 ***
## I(Total.Ecological.Footprint^2) -1.369e-06  3.644e-05    -0.038    0.970    
## I(Total.Biocapacity^2)           4.949e-07  1.128e-06     0.439    0.661    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.005117 on 183 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 2.474e+08 on 4 and 183 DF,  p-value: < 2.2e-16
linearRegression3 <- ggplot(data, aes(Cropland, Total.Ecological.Footprint)) +
  geom_point(aes(text = Country)) +
  geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
  geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
  ggtitle("Simple Linear Regression Model (Cropland)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression3, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing non-finite values (stat_smooth).
linearRegression4 <- ggplot(data, aes(Forest.Land, Total.Ecological.Footprint)) +
  geom_point(aes(text = Country)) +
  geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
  geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
  ggtitle("Simple Linear Regression Model (Forest Land)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression4, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing non-finite values (stat_smooth).
linearRegression5 <- ggplot(data, aes(Fishing.Water, Total.Ecological.Footprint)) +
  geom_point(aes(text = Country)) +
  geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
  geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
  ggtitle("Simple Linear Regression Model (Fishing Land)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression5, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing non-finite values (stat_smooth).